# -*- coding: utf-8 -*-
import scrapy
import json
from ..items import  FirstItem
class FirstSpider(scrapy.Spider):
    name = 'FirstSpider'
    allowed_domains = ['qidian.com']
    start_urls = ['https://www.qidian.com/']

    def parse(self, response):
        for each in response.xpath("//a"):
            item = FirstItem()
            result = each.extract()
            strart_index = result.index("\"") + 1
            end_index = result.find("\"", strart_index)
            url = result[strart_index: end_index]
            item['url'] = url
            yield item
            if url[:5] == "https":
                yield scrapy.Request(url, callback=self.parse) #非常关键的一句。用于将下一个爬取任务加入到调度器
            else:
                yield scrapy.Request(self.start_urls[0] + url, callback=self.parse) #因为有些链接是相对路径
